## Calling different Libraries in python to work on the Determine certain matrices
## to identify the star restaurants and generate recommendations.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt,seaborn as sns
%matplotlib inline
import warnings
resturant_data=pd.read_excel('data.xlsx') ## uploading resturant data
resturant_data.head()
| Restaurant ID | Restaurant Name | Country Code | City | Address | Locality | Locality Verbose | Longitude | Latitude | Cuisines | Average Cost for two | Currency | Has Table booking | Has Online delivery | Price range | Aggregate rating | Rating color | Rating text | Votes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7402935 | Skye | 94 | Jakarta | Menara BCA, Lantai 56, Jl. MH. Thamrin, Thamri... | Grand Indonesia Mall, Thamrin | Grand Indonesia Mall, Thamrin, Jakarta | 106.821999 | -6.196778 | Italian, Continental | 800000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.1 | Green | Very Good | 1498 |
| 1 | 7410290 | Satoo - Hotel Shangri-La | 94 | Jakarta | Hotel Shangri-La, Jl. Jend. Sudirman | Hotel Shangri-La, Sudirman | Hotel Shangri-La, Sudirman, Jakarta | 106.818961 | -6.203292 | Asian, Indonesian, Western | 800000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.6 | Dark Green | Excellent | 873 |
| 2 | 7420899 | Sushi Masa | 94 | Jakarta | Jl. Tuna Raya No. 5, Penjaringan | Penjaringan | Penjaringan, Jakarta | 106.800144 | -6.101298 | Sushi, Japanese | 500000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.9 | Dark Green | Excellent | 605 |
| 3 | 7421967 | 3 Wise Monkeys | 94 | Jakarta | Jl. Suryo No. 26, Senopati, Jakarta | Senopati | Senopati, Jakarta | 106.813400 | -6.235241 | Japanese | 450000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.2 | Green | Very Good | 395 |
| 4 | 7422489 | Avec Moi Restaurant and Bar | 94 | Jakarta | Gedung PIC, Jl. Teluk Betung 43, Thamrin, Jakarta | Thamrin | Thamrin, Jakarta | 106.821023 | -6.196270 | French, Western | 350000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.3 | Green | Very Good | 243 |
country_code=pd.read_excel('country-code.xlsx') ## uploading countru code
country_code.head()
| Country Code | Country | |
|---|---|---|
| 0 | 1 | India |
| 1 | 14 | Australia |
| 2 | 30 | Brazil |
| 3 | 37 | Canada |
| 4 | 94 | Indonesia |
working_data=pd.merge(resturant_data,country_code,on='Country Code',how='left')
working_data.head()
| Restaurant ID | Restaurant Name | Country Code | City | Address | Locality | Locality Verbose | Longitude | Latitude | Cuisines | Average Cost for two | Currency | Has Table booking | Has Online delivery | Price range | Aggregate rating | Rating color | Rating text | Votes | Country | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7402935 | Skye | 94 | Jakarta | Menara BCA, Lantai 56, Jl. MH. Thamrin, Thamri... | Grand Indonesia Mall, Thamrin | Grand Indonesia Mall, Thamrin, Jakarta | 106.821999 | -6.196778 | Italian, Continental | 800000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.1 | Green | Very Good | 1498 | Indonesia |
| 1 | 7410290 | Satoo - Hotel Shangri-La | 94 | Jakarta | Hotel Shangri-La, Jl. Jend. Sudirman | Hotel Shangri-La, Sudirman | Hotel Shangri-La, Sudirman, Jakarta | 106.818961 | -6.203292 | Asian, Indonesian, Western | 800000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.6 | Dark Green | Excellent | 873 | Indonesia |
| 2 | 7420899 | Sushi Masa | 94 | Jakarta | Jl. Tuna Raya No. 5, Penjaringan | Penjaringan | Penjaringan, Jakarta | 106.800144 | -6.101298 | Sushi, Japanese | 500000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.9 | Dark Green | Excellent | 605 | Indonesia |
| 3 | 7421967 | 3 Wise Monkeys | 94 | Jakarta | Jl. Suryo No. 26, Senopati, Jakarta | Senopati | Senopati, Jakarta | 106.813400 | -6.235241 | Japanese | 450000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.2 | Green | Very Good | 395 | Indonesia |
| 4 | 7422489 | Avec Moi Restaurant and Bar | 94 | Jakarta | Gedung PIC, Jl. Teluk Betung 43, Thamrin, Jakarta | Thamrin | Thamrin, Jakarta | 106.821023 | -6.196270 | French, Western | 350000 | Indonesian Rupiah(IDR) | No | No | 3 | 4.3 | Green | Very Good | 243 | Indonesia |
working_data.shape ## finding the shape of new matrix
(9551, 20)
working_data.dtypes
Restaurant ID int64 Restaurant Name object Country Code int64 City object Address object Locality object Locality Verbose object Longitude float64 Latitude float64 Cuisines object Average Cost for two int64 Currency object Has Table booking object Has Online delivery object Price range int64 Aggregate rating float64 Rating color object Rating text object Votes int64 Country object dtype: object
working_data.isnull().sum()
Restaurant ID 0 Restaurant Name 1 Country Code 0 City 0 Address 0 Locality 0 Locality Verbose 0 Longitude 0 Latitude 0 Cuisines 9 Average Cost for two 0 Currency 0 Has Table booking 0 Has Online delivery 0 Price range 0 Aggregate rating 0 Rating color 0 Rating text 0 Votes 0 Country 0 dtype: int64
working_data=working_data.dropna(how='any')
print(working_data.isnull().sum())
Restaurant ID 0 Restaurant Name 0 Country Code 0 City 0 Address 0 Locality 0 Locality Verbose 0 Longitude 0 Latitude 0 Cuisines 0 Average Cost for two 0 Currency 0 Has Table booking 0 Has Online delivery 0 Price range 0 Aggregate rating 0 Rating color 0 Rating text 0 Votes 0 Country 0 dtype: int64
working_data.duplicated()
0 False
1 False
2 False
3 False
4 False
...
9546 False
9547 False
9548 False
9549 False
9550 False
Length: 9541, dtype: bool
working_data.duplicated().sum() ## finding total numbers of duplicates in each column
0
working_data1=working_data.rename(columns={'Restaurant ID':'resturant_id','Restaurant Name':'restaurant_name','City':'city',
'Country Code':'country_code','Average Cost for two':'average_cost2',
'Has Table booking':'table_booking','Has Online delivery':'deliver_online',
'Price range':'price_range',
'Aggregate rating':'agg_rating','Rating text':'rating_text','Votes':'votes',
'Country':'country' })
## modifying the name of rows and columns for ease of working
working_data1.columns ## checking the modification of columns
Index(['resturant_id', 'restaurant_name', 'country_code', 'city', 'Address',
'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
'average_cost2', 'Currency', 'table_booking', 'deliver_online',
'price_range', 'agg_rating', 'Rating color', 'rating_text', 'votes',
'country'],
dtype='object')
working_data1.city.value_counts() # finding the number of restaurants city wise
New Delhi 5473
Gurgaon 1118
Noida 1080
Faridabad 251
Ghaziabad 25
...
Consort 1
Lincoln 1
Monroe 1
Potrero 1
Lakes Entrance 1
Name: city, Length: 140, dtype: int64
value_counts=working_data1.city.value_counts() # graphical represntation of number of restaurants city wise
filtered_value=value_counts[value_counts>20]
plt.figure(figsize=(12,6))
plt.bar(filtered_value.index,filtered_value.values)
plt.xlabel('city')
plt.ylabel('Num of restaurants')
plt.title('city wise distribution of restaurants')
plt.show()
working_data1.country.value_counts() # finding the number of resturant by country
India 8651 United States 425 United Kingdom 80 South Africa 60 UAE 60 Brazil 60 New Zealand 40 Turkey 34 Australia 24 Phillipines 22 Indonesia 21 Sri Lanka 20 Qatar 20 Singapore 20 Canada 4 Name: country, dtype: int64
value_counts=working_data1.country.value_counts() # graphical representation of number of resturants by country
plt.figure(figsize=(20,8))
plt.bar(value_counts.index,value_counts.values)
plt.xlabel('country')
plt.ylabel('Num of restaurants')
plt.title('country wise distribution of restaurants')
plt.xticks(rotation=90)
plt.show()
working_data1.restaurant_name.value_counts() # finding the count for franchise
Cafe Coffee Day 83
Domino's Pizza 79
Subway 63
Green Chick Chop 51
McDonald's 48
..
Mudrika Food Factory 1
Kavita's Restaurant 1
Sam and Scrooge 1
Saheb's Restaurant 1
VNS Live Studio 1
Name: restaurant_name, Length: 7436, dtype: int64
value_counts=working_data1.restaurant_name.value_counts() # graphical representation of franchise by count
filtered_value=value_counts[value_counts>20]
plt.figure(figsize=(20,10))
plt.bar(filtered_value.index,filtered_value.values)
plt.xlabel('restaurant name')
plt.ylabel('Num of franchise')
plt.title('Restaurants with more than 20 fanchise')
plt.xticks(rotation=90)
plt.show()
has_booking=working_data1.table_booking.value_counts() # finding the count of restaurants which have table bookingsvs dont
has_booking
No 8383 Yes 1158 Name: table_booking, dtype: int64
booking_ratio=has_booking[1]/has_booking[0] # finding the table booking ratio
print('Ratio of restaurants haning bookings',booking_ratio)
Ratio of restaurants haning bookings 0.13813670523678873
plt.bar(['No table booking','Table Booking'],has_booking) # graphical represtation of table booking data
plt.xlabel('Table Booking')
plt.ylabel ('Number of Restaurants')
plt.title('Chart for restaurants having table bookings vs not having')
plt.show()
delivery=working_data1.deliver_online.value_counts() # finding count of restaurants providing online delivery vs dont
delivery=working_data1.deliver_online.value_counts() ## finding the percentage of restaurants giving online delivery
delivery_avilable=delivery[1]
Num_of_restaurants=working_data1.resturant_id.count()
print('Number of restaurants having online delivery',delivery_avilable)
print ('Total Number of restaurants',Num_of_restaurants)
print ('percentage of restaurants having online delivery',(delivery_avilable/Num_of_restaurants)*100)
Number of restaurants having online delivery 2451 Total Number of restaurants 9541 percentage of restaurants having online delivery 25.68913111833141
delivery=working_data1.deliver_online.value_counts() # graph reprentation of online delivery vs dont
plt.bar(['No Online delivery','Online Delivery'],delivery)
plt.xlabel('Restaurants having online delivery vs those that do not')
plt.ylabel('Number of Restaurants')
plt.title('Graph for online delivery of restaurants')
plt.show()
deliver_votes=working_data1.loc[working_data1['deliver_online']=='Yes','votes'].sum() # finding difference in votes of online delivery vs dont
print('online delivery total votes',deliver_votes)
no_deliver_votes=working_data1.loc[working_data1['deliver_online']=='No','votes'].sum()
print('no delivery total votes',no_deliver_votes)
difference_votes=[no_deliver_votes-deliver_votes]
print('Difference in votes for delivery and non delivery',difference_votes)
online delivery total votes 517914 no delivery total votes 977236 Difference in votes for delivery and non delivery [459322]
grouped_data = working_data1.groupby('deliver_online') # Get the number of votes for each group
votes_with_delivery = grouped_data.get_group('Yes')['votes'].sum()
votes_without_delivery = grouped_data.get_group('No')['votes'].sum()
# Create a bar chart
plt.bar(['With Delivery', 'Without Delivery'], [votes_with_delivery, votes_without_delivery])
plt.xlabel('Online Delivery')
plt.ylabel('Number of Votes')
plt.title('Number of Votes with and without Online Delivery')
plt.show()
top_cuisines=working_data1.Cuisines.value_counts() # displaying top 10 cuisines
top_cuisines.head(10)
North Indian 936 North Indian, Chinese 511 Chinese 354 Fast Food 354 North Indian, Mughlai 334 Cafe 299 Bakery 218 North Indian, Mughlai, Chinese 197 Bakery, Desserts 170 Street Food 149 Name: Cuisines, dtype: int64
top_cuisines=working_data.Cuisines.value_counts().head(10)
plt.bar(top_cuisines.index,top_cuisines.values)
plt.xlabel('Cousines')
plt.ylabel('count')
plt.title('Top 10 cuisines with count')
plt.xticks(rotation=90)
plt.show()
cuisines_count = working_data1.Cuisines.value_counts().head(3) # finding the top 3 leading cuisines
cuisines_count
North Indian 936 North Indian, Chinese 511 Chinese 354 Name: Cuisines, dtype: int64
grouped_data = working_data1.groupby('city') # getting most served cuisines across restaurants for each city
for city, group in grouped_data:
top_cuisine = group['Cuisines'].value_counts().idxmax()
print(f"Most served cuisine in {city}: {top_cuisine}")
Most served cuisine in Abu Dhabi: Indian Most served cuisine in Agra: North Indian, Mughlai Most served cuisine in Ahmedabad: Continental, Chinese, North Indian Most served cuisine in Albany: Japanese, Steak, Sushi Most served cuisine in Allahabad: North Indian, Chinese Most served cuisine in Amritsar: North Indian Most served cuisine in Ankara: Kebab, Turkish Pizza Most served cuisine in Armidale: Bar Food, Steak Most served cuisine in Athens: American, Italian, Pizza Most served cuisine in Auckland: European Most served cuisine in Augusta: Mexican Most served cuisine in Aurangabad: North Indian Most served cuisine in Balingup: Modern Australian Most served cuisine in Bandung: Cafe, Coffee and Tea, Western Most served cuisine in Bangalore: Pizza, Cafe, Italian Most served cuisine in Beechworth: Pizza, Bar Food Most served cuisine in Bhopal: Fast Food Most served cuisine in Bhubaneshwar: Chinese Most served cuisine in Birmingham: Italian Most served cuisine in Bogor: Peranakan, Indonesian Most served cuisine in Boise: Pizza Most served cuisine in BrasÌ_lia: Italian Most served cuisine in Cape Town: Seafood, Japanese, Sushi Most served cuisine in Cedar Rapids/Iowa City: American, Breakfast, Burger Most served cuisine in Chandigarh: Italian Most served cuisine in Chatham-Kent: Japanese, Sushi Most served cuisine in Chennai: North Indian, Mughlai, Chinese, South Indian Most served cuisine in Clatskanie: American, Breakfast, Desserts Most served cuisine in Cochrane: Asian, Japanese Most served cuisine in Coimbatore: Italian, North Indian, Desserts Most served cuisine in Colombo: Seafood, Italian Most served cuisine in Columbus: American Most served cuisine in Consort: Chinese, Canadian Most served cuisine in Dalton: American, BBQ, Southern Most served cuisine in Davenport: Mexican Most served cuisine in Dehradun: North Indian, Chinese Most served cuisine in Des Moines: American, Italian Most served cuisine in Dicky Beach: Coffee and Tea, Tea, Modern Australian Most served cuisine in Doha: Indian Most served cuisine in Dubai: Indian Most served cuisine in Dubuque: Mexican Most served cuisine in East Ballina: Cafe Most served cuisine in Edinburgh: American Most served cuisine in Faridabad: North Indian Most served cuisine in Fernley: Mexican Most served cuisine in Flaxton: Tea, Modern Australian Most served cuisine in Forrest: Cafe, Australian Most served cuisine in Gainesville: American, Southern Most served cuisine in Ghaziabad: Cafe Most served cuisine in Goa: Finger Food Most served cuisine in Gurgaon: North Indian Most served cuisine in Guwahati: Cafe Most served cuisine in Hepburn Springs: Cafe, Coffee and Tea, Modern Australian Most served cuisine in Huskisson: Breakfast, Modern Australian Most served cuisine in Hyderabad: Mexican, American, Tex-Mex, Burger Most served cuisine in Indore: North Indian, Chinese Most served cuisine in Inner City: European, Contemporary Most served cuisine in Inverloch: Burger, Coffee and Tea, Modern Australian Most served cuisine in Jaipur: Chinese, Asian Most served cuisine in Jakarta: Sunda, Indonesian Most served cuisine in Johannesburg: Continental, South African, Beverages, Desserts, Seafood, Grill, Ice Cream, International Most served cuisine in Kanpur: North Indian, Chinese Most served cuisine in Kochi: Cafe, Continental, Italian Most served cuisine in Kolkata: North Indian, Chinese Most served cuisine in Lakes Entrance: Breakfast, Coffee and Tea Most served cuisine in Lakeview: Burger, Desserts, Sandwich Most served cuisine in Lincoln: Thai Most served cuisine in London: American, Burger Most served cuisine in Lorn: Breakfast, Coffee and Tea Most served cuisine in Lucknow: Cafe, Fast Food Most served cuisine in Ludhiana: North Indian, Chinese, Continental Most served cuisine in Macedon: Cafe Most served cuisine in Macon: Japanese, Sushi, Thai Most served cuisine in Makati City: Japanese Most served cuisine in Manchester: Italian Most served cuisine in Mandaluyong City: Seafood, Asian, Filipino, Indian Most served cuisine in Mangalore: Seafood Most served cuisine in Mayfield: Asian Most served cuisine in Mc Millan: Breakfast, Burger Most served cuisine in Middleton Beach: Bar Food, Modern Australian Most served cuisine in Mohali: Continental, North Indian Most served cuisine in Monroe: Italian, Pizza Most served cuisine in Montville: Coffee and Tea, Modern Australian Most served cuisine in Mumbai: Cafe, Italian, Desserts, Fast Food, Chinese, Tea Most served cuisine in Mysore: North Indian, Chinese, South Indian Most served cuisine in Nagpur: Cafe Most served cuisine in Nashik: North Indian Most served cuisine in New Delhi: North Indian Most served cuisine in Noida: North Indian Most served cuisine in Ojo Caliente: American, International, Southwestern Most served cuisine in Orlando: Brazilian, Steak Most served cuisine in Palm Cove: Mediterranean, Seafood Most served cuisine in Panchkula: North Indian, Middle Eastern, Pizza Most served cuisine in Pasay City: European, Asian, Indian Most served cuisine in Pasig City: Filipino Most served cuisine in Patna: North Indian, Chinese Most served cuisine in Paynesville: Modern Australian Most served cuisine in Penola: Cafe, Coffee and Tea, Sandwich Most served cuisine in Pensacola: Burger, Bar Food, Steak Most served cuisine in Phillip Island: Breakfast, Coffee and Tea, Modern Australian Most served cuisine in Pocatello: Mexican Most served cuisine in Potrero: American, BBQ, Burger Most served cuisine in Pretoria: French Most served cuisine in Princeton: Seafood Most served cuisine in Puducherry: Cafe Most served cuisine in Pune: Continental, North Indian, Mughlai, Burmese Most served cuisine in Quezon City: Filipino, Mexican Most served cuisine in Ranchi: North Indian, South Indian, Chinese Most served cuisine in Randburg: Cafe Most served cuisine in Rest of Hawaii: Hawaiian, Seafood, Steak Most served cuisine in Rio de Janeiro: Brazilian Most served cuisine in San Juan City: Filipino Most served cuisine in Sandton: Mexican Most served cuisine in Santa Rosa: Italian, Pizza Most served cuisine in Savannah: American, Breakfast Most served cuisine in Secunderabad: North Indian, Chinese Most served cuisine in Sharjah: American, Mexican Most served cuisine in Singapore: French Most served cuisine in Sioux City: American, Seafood, Steak Most served cuisine in Surat: South Indian Most served cuisine in SÌ£o Paulo: Brazilian Most served cuisine in Tagaytay City: Filipino Most served cuisine in Taguig City: Seafood, American, Mediterranean, Japanese Most served cuisine in Tampa Bay: American, Desserts, Steak Most served cuisine in Tangerang: Indonesian Most served cuisine in Tanunda: Modern Australian, Australian Most served cuisine in Trentham East: Australian Most served cuisine in Vadodara: Cafe Most served cuisine in Valdosta: Mexican Most served cuisine in Varanasi: Chinese, North Indian Most served cuisine in Vernonia: Coffee and Tea, Mediterranean Most served cuisine in Victor Harbor: Coffee and Tea, Tapas, Australian Most served cuisine in Vineland Station: Italian, Mediterranean, Pizza Most served cuisine in Vizag: Fast Food Most served cuisine in Waterloo: Mexican Most served cuisine in Weirton: Burger, Greek, Sandwich Most served cuisine in Wellington City: Cafe Most served cuisine in Winchester Bay: Burger, Seafood, Steak Most served cuisine in Yorkton: Asian Most served cuisine in €¡stanbul: Cafe
## Writing a function to convert all currency into USD
def convert_to_usd(currency, average_cost):
conversion_rates = { # defining the current currency conversion rates for 1 usd
'Indonesian Rupiah(IDR)': 14390.50,
'Indian Rupees(Rs.)': 74.13,
'Botswana Pula(P)': 11.07,
'Sri Lankan Rupee(LKR)': 200,
'Rand(R)': 15.39,
'Qatari Rial(QR)': 3.64,
'Dollar($)': 1,
'Emirati Diram(AED)': 3.67,
'Brazilian Real(R$)': 5.20,
'Turkish Lira(TL)': 13.16,
'Pounds(£)': 0.72,
'NewZealand($)': 1.42
}
if currency in conversion_rates:
conversion_rate = conversion_rates[currency]
converted_cost = average_cost / conversion_rate
return converted_cost
else:
return None
working_data1['converted_cost_usd'] = working_data1.apply(lambda row: convert_to_usd(row['Currency'],
row['average_cost2']), axis=1) # creating a column for converted currency
working_data1['converted_cost_usd'].head()
0 55.592231 1 55.592231 2 34.745144 3 31.270630 4 24.321601 Name: converted_cost_usd, dtype: float64
sns.boxplot(data=working_data1,x='converted_cost_usd').set(title='Cost distribution across various restaurants')
average_price=working_data1['converted_cost_usd'].mean() # finding the average rating
print()
print('Average Price across restaurants',average_price)
Average Price across restaurants 10.669980333842458
working_data1.columns #getting names of columns once again for further analysis
Index(['resturant_id', 'restaurant_name', 'country_code', 'city', 'Address',
'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
'average_cost2', 'Currency', 'table_booking', 'deliver_online',
'price_range', 'agg_rating', 'Rating color', 'rating_text', 'votes',
'country', 'converted_cost_usd'],
dtype='object')
sns.boxplot(data=working_data1,x='agg_rating').set(title='Boxplot of ratings') # boxplot for rating
print()
average_rating = working_data1['agg_rating'].mean() # finding the average rating
print('Average rating:', average_rating)
Average rating: 2.6650875170317634
# Finding relation between price and rating through graph
plot = sns.jointplot(x=working_data1.converted_cost_usd, y=working_data1.agg_rating, kind='scatter')
plot.set_axis_labels('Price (USD)', 'Rating')
plot.fig.suptitle("Rating vs Price", y=1.02)
Text(0.5, 1.02, 'Rating vs Price')
plot = sns.jointplot(x=working_data1.votes, y=working_data1.agg_rating, kind='scatter') # code to plot rating vs votes
plot.set_axis_labels('Votes', 'Rating')
plot.fig.suptitle("Rating vs votes", y=1.02)
Text(0.5, 1.02, 'Rating vs votes')
sns.catplot(data=working_data1, x='table_booking', y='agg_rating', kind='bar', ci=None)
plt.xlabel('Table Booking')
plt.ylabel('Aggregate Rating')
plt.title('Rating Distribution by Table Booking')
plt.show()
plt.figure(figsize=[30, 650])
boxplot = sns.boxplot(y='Cuisines', x='agg_rating', data=working_data1, palette='Set2')
boxplot.set(title='Rating vs City')
boxplot.set_yticklabels(boxplot.get_yticklabels(), fontsize=20)
plt.show()